Set up

Packages

require(tidyr)
require(dplyr)
require(magrittr)
require(ggplot2)
require(bayesplot)
require(ape)
require(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
require(brms)
require(ggmcmc)
require(knitr)
require(phytools)
require(cowplot)
require(RColorBrewer)
require(ggtree)
require(car)
require(xtable)
require(phytools)

# Functions
nrmse <-  function(obs, pred, type = "sd") {
  
    squared_sums <- sum((obs - pred)^2)
    mse <- squared_sums/length(obs)
    rmse <- sqrt(mse)
    if (type == "sd") nrmse <- rmse/sd(obs)
    if (type == "mean") nrmse <- rmse/mean(obs)
    if (type == "maxmin") nrmse <- rmse/ (max(obs) - min(obs))
    if (type == "iq") nrmse <- rmse/ (quantile(obs, 0.75) - quantile(obs, 0.25))
    if (!type %in% c("mean", "sd", "maxmin", "iq")) message("Wrong type!")
    nrmse <- round(nrmse, 3)
 return(nrmse)
    
}

rmse <- function(obs, pred){
    sqrt(mean((obs - pred)^2))
}

inv_logit <- function(x) {exp(x)/(1+exp(x))}

Loading data

data <- read.csv("./sp.threat.nat.csv", as.is=F)

nrow(data)# 395
range(data$vetted)# 0-3346
nrow(data[data$vetted>0,])# 317

### Magellon tree
tree <- read.tree("./Ramirez_etal_pruned.tre")

length(tree$tip.label)# 442

# These families we have data for, but are not in tree
setdiff(data$family, tree$tip.label) 

# Getting family age
min_above_zero <- function(x){

  if (length(x)>1) {
    min(x[x>0])
  } else {0}

}

# Calculate family age
coph <- cophenetic(tree)

fam_age <- sapply(rownames(coph), function(fam) 
          min_above_zero(coph[rownames(coph)%in%fam,])
          ) 

ages <- data.frame(family=names(fam_age), age=as.numeric(fam_age)/2)
ages %<>% arrange(family)
data <- left_join(data,ages)

# Calculating simple diversification metric - log(N+1)/age
data$div_simple <- with(data, log(species)/age)

## Calculating Method of Moments diversification
# https://bio.libretexts.org/Bookshelves/Evolutionary_Developmental_Biology/Book%3A_Phylogenetic_Comparative_Methods_(Harmon)/11%3A_Fitting_Birth-Death_Models/11.02%3A_Clade_Age_and_Diversity

#extract family stem ages
ages<-NULL
for (x in 1: length(tree$tip.label)){
sp<-tree$tip.label[x]
edge<-tree$edge.length[tree$edge[,2]==x]
ages<-rbind(ages,(data.frame(sp,edge)))
}

data.rates <- merge(data, ages,by.x="family", by.y = "sp")
#eugh my numerics edge lengths have been converted to factors
data.rates$edge<-as.numeric(as.character(data.rates$edge))

#sanity check
# head(ages[order(ages$sp),])
# head(data.rates)

ages[ages$edge==max(ages$edge),]

#Calculate rates assuming different extinction fractions (e)
e<-0
r0<-log(data.rates$species*(1-e)+e)/data.rates$edge

e<-0.5
r0.5<-log(data.rates$species*(1-e)+e)/data.rates$edge

e<-0.9
r0.9<-log(data.rates$species*(1-e)+e)/data.rates$edge

#just for a quick look-see
# plot(r0, r0.9)

data<-cbind(data.rates, r0, r0.5, r0.9)
pairs(data[,c("r0","r0.5","r0.9")])

unique(data[data$div_simple==max(data$div_simple),])

data$div_mom <- data$r0.9

with(data, plot(div_simple, div_mom))

with(data, cor(div_simple, div_mom))#0.974

# Setting diversification as div_simple
data$diversification <- data$div_simple

# creating another family level variable (for non-phylogenetic family level effects)
data$family_name <- data$family

# These families we have data for, but are not in tree
setdiff(data$family, tree$tip.label) 

# Identify families in tree with no data
setdiff(tree$tip.label, data$family)

# remove any missing families in data
data <- data[data$family%in%tree$tip.label,]

# remove any families in tree with no data 
tree <- drop.tip(tree, setdiff(tree$tip.label, as.character(data$family)))

# phylogenetic correlation structure
phy_cov <- vcv(tree, corr=TRUE)

nrow(data)#395
nrow(data[data$vetted>0,])#317

# instances where threatened is greater than vetted
# nrow(data[data$threatened>data$vetted,])#0

# instances where threatened is greater than species
# nrow(data[data$threatened>data$species,])#0

Transforming predictors

# Code for scale and unscale with mean=0 sd=0.5 (following Gelman 2008)
scale_half <- function(x){
  return( (x-mean(x)) * 0.5/sd(x))
}

unscale_half <- function(x,original){
    x*sd(original)*2 + mean(original)  
}

# Converting hybrids to proportion
data$hybrids <- data$hybrids/data$species

# Scaling continuous predictors
data$age_scaled <- scale_half(log(data$age))
data$species_scaled <- scale_half(log(data$species))
data$diversification_scaled <- scale_half(log(data$diversification+1))
data$diversification_mom_scaled <- scale_half(log(data$div_mom+1))
data$naturalized_scaled <- scale_half(log(data$naturalized+1))
data$threatened_scaled <- scale_half(log(data$threatened+1))
data$hort.species <- scale_half(log(data$hort.species+1))

data$hybrids <- scale_half(data$hybrids)
data$range.size <- scale_half(sqrt(data$range.size))

# Subsetting to IUCN vetted species (for threat models)
data_vetted <- data[data$vetted>0,]
tree_vetted <- drop.tip(tree, setdiff(tree$tip.label, as.character(data_vetted$family)))
phy_cov_vetted <- vcv(tree_vetted, corr=TRUE)

Assessing multicollinearity

vif(lm(threatened ~ naturalized_scaled + diversification_scaled + age_scaled + range.size, data=data_vetted))
##     naturalized_scaled diversification_scaled             age_scaled 
##               3.529148               3.496445               1.674901 
##             range.size 
##               3.734682
vif(lm(threatened ~ diversification_scaled + age_scaled +  hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size, data=data_vetted))
## diversification_scaled             age_scaled                hybrids 
##               3.315630               1.687157               1.137351 
##            climate.sum                 animal                asexual 
##               1.726952               1.108766               1.268796 
##                 annual           fleshy.fruit             herbaceous 
##               1.753944               1.150634               1.393140 
##             range.size 
##               3.417065
vif(lm(naturalized ~ threatened_scaled + age_scaled +  hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size, data=data_vetted))
## threatened_scaled        age_scaled           hybrids       climate.sum 
##          1.935678          1.020602          1.147107          1.697694 
##            animal           asexual            annual      fleshy.fruit 
##          1.104978          1.302405          1.756528          1.154597 
##        herbaceous        range.size 
##          1.423269          2.988637
# with horticultural species
vif(lm(threatened ~ diversification_scaled + age_scaled +  hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size + hort.species, data=data_vetted))
## diversification_scaled             age_scaled                hybrids 
##               3.854150               1.741455               1.142688 
##            climate.sum                 animal                asexual 
##               1.789898               1.130522               1.334724 
##                 annual           fleshy.fruit             herbaceous 
##               1.822324               1.152992               1.393760 
##             range.size           hort.species 
##               4.172669               4.766442
vif(lm(naturalized ~ threatened_scaled + age_scaled +  hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size + hort.species, data=data_vetted))
## threatened_scaled        age_scaled           hybrids       climate.sum 
##          2.330926          1.033480          1.157313          1.794180 
##            animal           asexual            annual      fleshy.fruit 
##          1.125600          1.351916          1.846114          1.155710 
##        herbaceous        range.size      hort.species 
##          1.438503          3.870661          4.937729
with(data_vetted, cor(diversification_scaled, range.size))
## [1] 0.6882094
# full dataset
vif(lm(threatened ~ diversification_scaled + age_scaled +  hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size + hort.species, data=data))
## diversification_scaled             age_scaled                hybrids 
##               3.693999               1.509428               1.134472 
##            climate.sum                 animal                asexual 
##               1.742312               1.102910               1.362397 
##                 annual           fleshy.fruit             herbaceous 
##               1.734637               1.116877               1.276750 
##             range.size           hort.species 
##               4.503627               4.717285
vif(lm(naturalized ~ diversification_scaled + age_scaled +  hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size + hort.species, data=data))
## diversification_scaled             age_scaled                hybrids 
##               3.693999               1.509428               1.134472 
##            climate.sum                 animal                asexual 
##               1.742312               1.102910               1.362397 
##                 annual           fleshy.fruit             herbaceous 
##               1.734637               1.116877               1.276750 
##             range.size           hort.species 
##               4.503627               4.717285
with(data, plot(diversification_scaled ~ range.size))

with(data, cor(diversification_scaled, range.size))#0.726
## [1] 0.7264469

   

Exploration of priors

Visualization of brms default priors (blue) compared to custom priors (yellow) inspired by McElreath’s book Statistical Rethinking.

Priors for the slope parameters:

prior <- runif(n=10000, min=-100, max=100) # brms default is actually -Inf to +Inf
p <- inv_logit(prior)
prior2 <- rnorm(n=10000, 0, 1.5) # McElreath
p2 <- inv_logit(prior2)

plot(density(p, adj=0.1), xlim=c(0,1), col="#56B4E9", main="", xlab=expression(paste("inv_logit (",beta,")")))
lines(density(p2, adj=0.1), col="#E69F00")

The default improper uniform prior used by brms is flat on the log-odd scale, so when converted to the probabilty scale, puts excess mass near 0 and 1. A Normal(0, 1.5) prior is more reasonable, having a flatter distribution with a mode at 0.5 on the probablity scale.

Priors for the family-level variance parameters:

prior <- rstudent_t(n=10000, df=3, mu=0, sigma=10) # brms default
prior <- prior[prior>0]
# p <- inv_logit(prior)

prior2 <- rnorm(n=10000, 0, 1) # McElreath
prior2 <- prior2[prior2>0]
# p2 <- inv_logit(prior2)

plot(density(prior2, adj=0.1), xlim=c(0,10), col="#E69F00", main="", xlab=expression(paste(sigma)))
lines(density(prior, adj=0.1), col="#56B4E9")

## png 
##   2

The brms prior is extremely long tailed. Considering we are fitting a non-linear model in which changes in log-odds over over 4 have a diminishing influence due to the ceiling effect of the binomial distribution. With this in mind, a prior of Normal(0,1) constrains the posterior to a more realistic range, and is likely to improve sampling efficiency.

Threat models (with IUCN evaluated species)

Simple threat model - brms default priors

if (!file.exists("./vet_threatened_naturalized_10k.rds")) {

  vet_threatened_nat <- brm(threatened | trials(vetted) ~ naturalized_scaled + diversification_scaled + age_scaled + range.size + (1|family) + (1|family_name), 
    data=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.95,max_treedepth=12), cores=4)

  saveRDS(vet_threatened_nat, "./vet_threatened_naturalized_10k.rds")

} else { vet_threatened_nat <- readRDS("./vet_threatened_naturalized_10k.rds")}

# prior_summary(vet_threatened_nat, all=TRUE)
# Default if for  b coefficients to have improper priors

outcome <- summary(vet_threatened_nat$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Number of naturalized species","Diversification rate","Family age","Range size", "SD brownian effects","SD family specific effects")

vet_threatened_nat_plot <- stanplot(vet_threatened_nat, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_nat_plot

pdf("./plots_tables/vet_threatened_nat_plot.pdf", width=5, height=4)
vet_threatened_nat_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Number of naturalized species -0.35 0.00 0.21 -0.76 -0.50 -0.36 -0.21 0.05 2691.46 1.00
Diversification rate 1.81 0.01 0.30 1.23 1.61 1.80 2.01 2.39 1517.16 1.00
Family age 1.03 0.00 0.23 0.57 0.88 1.03 1.18 1.47 3247.56 1.00
Range size -0.70 0.00 0.23 -1.16 -0.86 -0.70 -0.54 -0.24 4107.40 1.00
SD brownian effects 0.61 0.01 0.24 0.08 0.46 0.63 0.78 1.01 292.19 1.02
SD family specific effects 0.59 0.01 0.18 0.14 0.49 0.62 0.72 0.87 296.54 1.02
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_nat.tex")

##Posterior predictive checks
pp_vet_threatened_nat <- brms::posterior_predict(vet_threatened_nat)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_threatened_nat$data$threatened, pp_vet_threatened_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet_threatened_nat <- apply(pp_vet_threatened_nat, 1, function(x) nrmse(pred=x,obs=vet_threatened_nat$data$threatened, type="iq"))
mean(nrmse_vet_threatened_nat)
## [1] 0.3117106
sd(nrmse_vet_threatened_nat)
## [1] 0.0357692
# RMSE
rmse_vet_threatened_nat <- apply(pp_vet_threatened_nat, 1, function(x) rmse(pred=x,obs=vet_threatened_nat$data$threatened))
mean(rmse_vet_threatened_nat)
## [1] 6.545878
sd(rmse_vet_threatened_nat)
## [1] 0.7511597
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened_nat, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0      0.5      0.27     0.01     0.98         NA
##   Post.Prob Star
## 1        NA    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

Simple threat model - prior / posterior plots

# Plotting influence of prior on posterior for a beta parameter
# on the log-odds scale

tibble(x = seq(from = -10, to = 10, by = .01)) %>%   

  ggplot() +
  # the prior
  # improper uniform prior restricted to (-10,10) to aid visualization
  geom_ribbon(aes(x = x, ymin = 0, ymax = dunif(x,-10,10)),
              fill = "#A9A9A9", alpha = 1/2) +
  # the posterior
  geom_density(data = posterior_samples(vet_threatened_nat),
               aes(x = b_age_scaled), 
               fill = "#56B4E9", size = 0, alpha = 1/2) +
  scale_y_continuous() +
  coord_cartesian(xlim = c(-5, 5)) +
  labs(x="Beta Age (brms priors)", y="Density") + 
  theme_bw() -> prior_post_1_beta

prior_post_1_beta

# Plotting influence of prior on posterior for a sigma parameter

# SD Family Effect
tibble(x = seq(from = 0, to = 6, by = .01)) %>%   
  ggplot() +
  # the prior
  geom_ribbon(aes(x = x, ymin = 0, ymax = dstudent_t(x, df=3, mu=0, sigma=10)),
              fill = "#A9A9A9", alpha = 1/2) +
  # the posterior
  geom_density(data = posterior_samples(vet_threatened_nat),
               aes(x = sd_family_name__Intercept), 
               fill = "#56B4E9", size = 0, alpha = 1/2) +
  scale_y_continuous() +
  coord_cartesian(xlim = c(0, 5)) +
  labs(x="SD Family Effects (brms priors)", y="Density") + 
  theme_bw() -> prior_post_1_sigma

prior_post_1_sigma

Simple threat model - custom priors (based on McElreath)

if (!file.exists("./vet_threatened_naturalized_p2_10k.rds")) {

  vet_threatened_nat_p2 <- brm(threatened | trials(vetted) ~ naturalized_scaled + diversification_scaled + age_scaled + range.size + (1|family) + (1|family_name), 
    data=data_vetted, family=binomial(),

    # custom priors based on McElreath
    prior = c(prior(normal(0, 1), class = Intercept),
                prior(normal(0, 1.5), class = b),
                prior(normal(0, 1), class = sd)),

    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.95,max_treedepth=12), cores=4)

  saveRDS(vet_threatened_nat_p2, "./vet_threatened_naturalized_p2_10k.rds")

} else { vet_threatened_nat_p2 <- readRDS("./vet_threatened_naturalized_p2_10k.rds")}

# prior_summary(vet_threatened_nat_p2, all=TRUE)

outcome <- summary(vet_threatened_nat_p2$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Number of naturalized species","Diversification rate","Family age","Range size", "SD brownian effects","SD family specific effects")

vet_threatened_nat_p2_plot <- stanplot(vet_threatened_nat_p2, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_nat_p2_plot

pdf("./plots_tables/vet_threatened_nat_p2_plot.pdf", width=5, height=4)
vet_threatened_nat_p2_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Number of naturalized species -0.33 0.00 0.20 -0.73 -0.47 -0.33 -0.19 0.07 3778.16 1.00
Diversification rate 1.71 0.01 0.29 1.15 1.51 1.70 1.91 2.27 2032.84 1.00
Family age 0.95 0.00 0.23 0.51 0.80 0.95 1.11 1.40 3338.78 1.00
Range size -0.66 0.00 0.23 -1.11 -0.82 -0.66 -0.50 -0.21 3707.12 1.00
SD brownian effects 0.63 0.01 0.24 0.10 0.49 0.66 0.81 1.02 518.90 1.01
SD family specific effects 0.57 0.01 0.19 0.10 0.46 0.60 0.70 0.85 494.49 1.01
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_nat_p2.tex")

##Posterior predictive checks
pp_vet_threatened_nat_p2 <- brms::posterior_predict(vet_threatened_nat_p2)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_threatened_nat_p2$data$threatened, pp_vet_threatened_nat_p2[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet_threatened_nat_p2 <- apply(pp_vet_threatened_nat_p2, 1, function(x) nrmse(pred=x,obs=vet_threatened_nat_p2$data$threatened, type="iq"))
mean(nrmse_vet_threatened_nat_p2)
## [1] 0.311648
sd(nrmse_vet_threatened_nat_p2)
## [1] 0.03609379
# RMSE
rmse_vet_threatened_nat_p2 <- apply(pp_vet_threatened_nat_p2, 1, function(x) rmse(pred=x,obs=vet_threatened_nat_p2$data$threatened))
mean(rmse_vet_threatened_nat_p2)
## [1] 6.544551
sd(rmse_vet_threatened_nat_p2)
## [1] 0.7578132
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened_nat_p2, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.53      0.27     0.01     0.99         NA
##   Post.Prob Star
## 1        NA    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# Plotting influence of prior on posterior for a beta parameter
# on the log-odds scale

tibble(x = seq(from = -10, to = 10, by = .01)) %>%   
  ggplot() +
  # the prior
  geom_ribbon(aes(x = x, ymin = 0, ymax = dnorm(x,0,1.5)),
              fill = "#A9A9A9", alpha = 1/2) +
  # the posterior
  geom_density(data = posterior_samples(vet_threatened_nat_p2),
               aes(x = b_age_scaled), 
               fill = "#E69F00", size = 0, alpha = 1/2) +
  scale_y_continuous() +
  coord_cartesian(xlim = c(-5, 5)) +
  labs(x="Beta Age - custom priors", y="Density") + 
  theme_bw() -> prior_post_2_beta

prior_post_2_beta

# Plotting influence of prior on posterior for a sigma parameter

# SD Family Effect
tibble(x = seq(from = 0, to = 6, by = .01)) %>%   
  ggplot() +
  # the prior
  geom_ribbon(aes(x = x, ymin = 0, ymax = dnorm(x, 0,1)),
              fill = "#A9A9A9", alpha = 1/2) +
  # the posterior
  geom_density(data = posterior_samples(vet_threatened_nat_p2),
               aes(x = sd_family_name__Intercept), 
               fill = "#E69F00", size = 0, alpha = 1/2) +
  scale_y_continuous() +
  coord_cartesian(xlim = c(0, 5)) +
  labs(x="SD Family Effects (brms priors)", y="Density") + 
  theme_bw() -> prior_post_2_sigma

prior_post_2_sigma

Full threat model (IUCN vetted species) - brms default priors

if (!file.exists("./vet_threatened_10k.rds")) {

  vet_threatened <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name), 
    data=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.98,max_treedepth=13),cores=4)
    # no divergent transitions

  saveRDS(vet_threatened, "./vet_threatened_10k.rds")

} else { vet_threatened <- readRDS("./vet_threatened_10k.rds")}

# prior_summary(vet_threatened, all=FALSE)

outcome <- summary(vet_threatened$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")

vet_threatened_plot <- stanplot(vet_threatened, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_plot

pdf("./plots_tables/vet_threatened_plot.pdf", width=5, height=4)
vet_threatened_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 1.68 0.00 0.27 1.16 1.51 1.68 1.86 2.21 3760.75 1.00
Famly age 0.95 0.00 0.22 0.51 0.80 0.96 1.11 1.39 4251.86 1.00
Range size -0.77 0.00 0.24 -1.24 -0.92 -0.77 -0.61 -0.30 4574.07 1.00
Hybrids -0.22 0.00 0.12 -0.46 -0.30 -0.22 -0.14 0.01 5022.40 1.00
Animal pollinated 0.67 0.00 0.21 0.27 0.53 0.67 0.82 1.08 4168.52 1.00
Fleshy fruit -0.10 0.00 0.20 -0.48 -0.24 -0.10 0.03 0.29 4642.28 1.00
Climate sum -0.05 0.00 0.08 -0.21 -0.10 -0.05 0.01 0.12 4884.11 1.00
Herbaceous -0.03 0.00 0.18 -0.37 -0.15 -0.03 0.09 0.32 4949.48 1.00
Annual 0.07 0.00 0.17 -0.26 -0.04 0.07 0.18 0.39 4593.64 1.00
Axsexual -0.12 0.00 0.14 -0.39 -0.22 -0.12 -0.02 0.17 4539.44 1.00
SD brownian effects 0.51 0.01 0.24 0.05 0.33 0.52 0.70 0.96 562.12 1.01
SD family specific effects 0.63 0.01 0.16 0.22 0.54 0.65 0.74 0.85 628.17 1.01
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened.tex")

##Posterior predictive checks
# first make posterior predictions
pp_threatened <- brms::posterior_predict(vet_threatened)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_threatened$data$threatened, pp_threatened[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet <- apply(pp_threatened, 1, function(x) nrmse(pred=x,obs=vet_threatened$data$threatened, type="iq"))
mean(nrmse_vet)
## [1] 0.311238
sd(nrmse_vet)
## [1] 0.035023
# RMSE
rmse_vet <- apply(pp_threatened, 1, function(x) rmse(pred=x,obs=vet_threatened$data$threatened))
mean(rmse_vet)
## [1] 6.535986
sd(rmse_vet)
## [1] 0.7354639
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0      0.4      0.27        0     0.95         NA
##   Post.Prob Star
## 1        NA    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# plot(hyp)

Full threat model (IUCN vetted species) - brms prior / posterior plots

# Plotting influence of prior on posterior for a beta parameter
# on the log-odds scale

tibble(x = seq(from = -10, to = 10, by = .01)) %>%   
  ggplot() +
  # the prior
  # improper uniform prior restricted to (-10,10) to aid visualization
  geom_ribbon(aes(x = x, ymin = 0, ymax = dunif(x,-10,10)),
              fill = "#A9A9A9", alpha = 1/2) +
  # the posterior
  geom_density(data = posterior_samples(vet_threatened),
               aes(x = b_age_scaled), 
               fill = "#56B4E9", size = 0, alpha = 1/2) +
  scale_y_continuous() +
  coord_cartesian(xlim = c(-5, 5)) +
  labs(x="Beta Age (brms priors)", y="Density") + 
  theme_bw() -> prior_post_3_beta

prior_post_3_beta

# Plotting influence of prior on posterior for a sigma parameter

# SD Family Effect
tibble(x = seq(from = 0, to = 6, by = .01)) %>%   
  ggplot() +
  # the prior
  geom_ribbon(aes(x = x, ymin = 0, ymax = dstudent_t(x, df=3, mu=0, sigma=10)),
              fill = "#A9A9A9", alpha = 1/2) +
  # the posterior
  geom_density(data = posterior_samples(vet_threatened),
               aes(x = sd_family_name__Intercept), 
               fill = "#56B4E9", size = 0, alpha = 1/2) +
  scale_y_continuous() +
  coord_cartesian(xlim = c(0, 5)) +
  labs(x="SD Family Effects (brms priors)", y="Density") + 
  theme_bw() -> prior_post_3_sigma

prior_post_3_sigma

Full threat model (IUCN vetted species) - custom priors (based on McElreath)

if (!file.exists("./vet_threatened_p2_10k.rds")) {

  vet_threatened_p2 <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name), 

    # custom priors based on McElreath
    prior = c(prior(normal(0, 1), class = Intercept),
                prior(normal(0, 1.5), class = b),
                prior(normal(0, 1), class = sd)),

    data=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.98,max_treedepth=13),cores=4)

  # no divergent transitions

  saveRDS(vet_threatened_p2, "./vet_threatened_p2_10k.rds")

} else { vet_threatened_p2 <- readRDS("./vet_threatened_p2_10k.rds")}

# summary(vet_threatened_p2)
# prior_summary(vet_threatened_p2, all=FALSE)

outcome <- summary(vet_threatened_p2$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")

vet_threatened_p2_plot <- stanplot(vet_threatened_p2, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_p2_plot

pdf("./plots_tables/vet_threatened_p2_plot.pdf", width=5, height=4)
vet_threatened_p2_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 1.60 0.00 0.26 1.09 1.43 1.60 1.77 2.11 3227.17 1.00
Famly age 0.90 0.00 0.21 0.47 0.75 0.90 1.04 1.31 3598.23 1.00
Range size -0.72 0.00 0.23 -1.17 -0.88 -0.72 -0.57 -0.28 4081.70 1.00
Hybrids -0.22 0.00 0.12 -0.46 -0.30 -0.22 -0.14 0.01 4785.79 1.00
Animal pollinated 0.66 0.00 0.21 0.26 0.52 0.66 0.80 1.06 4431.56 1.00
Fleshy fruit -0.09 0.00 0.19 -0.46 -0.22 -0.09 0.04 0.29 4559.50 1.00
Climate sum -0.04 0.00 0.08 -0.21 -0.10 -0.04 0.01 0.12 4616.43 1.00
Herbaceous -0.03 0.00 0.17 -0.37 -0.14 -0.02 0.09 0.31 4648.53 1.00
Annual 0.06 0.00 0.16 -0.26 -0.05 0.05 0.17 0.38 4984.73 1.00
Axsexual -0.11 0.00 0.14 -0.39 -0.20 -0.11 -0.01 0.18 4559.65 1.00
SD brownian effects 0.50 0.02 0.24 0.04 0.31 0.52 0.68 0.94 225.51 1.01
SD family specific effects 0.63 0.01 0.16 0.23 0.55 0.65 0.74 0.85 261.73 1.01
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_p2.tex")

##Posterior predictive checks
# first make posterior predictions
pp_threatened <- brms::posterior_predict(vet_threatened_p2)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_threatened_p2$data$threatened, pp_threatened[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet <- apply(pp_threatened, 1, function(x) nrmse(pred=x,obs=vet_threatened_p2$data$threatened, type="iq"))
mean(nrmse_vet)
## [1] 0.3111226
sd(nrmse_vet)
## [1] 0.03603959
# RMSE
rmse_vet <- apply(pp_threatened, 1, function(x) rmse(pred=x,obs=vet_threatened_p2$data$threatened))
mean(rmse_vet)
## [1] 6.533531
sd(rmse_vet)
## [1] 0.7569331
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened_p2, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0      0.4      0.27        0     0.94         NA
##   Post.Prob Star
## 1        NA    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# plot(hyp)

Full threat model (IUCN vetted species) - custom prior / posterior plots

# Plotting influence of prior on posterior for a beta parameter
# on the log-odds scale

tibble(x = seq(from = -10, to = 10, by = .01)) %>%   
  ggplot() +
  # the prior
  # improper uniform prior restricted to (-10,10) to aid visualization
  geom_ribbon(aes(x = x, ymin = 0, ymax = dunif(x,-10,10)),
              fill = "#A9A9A9", alpha = 1/2) +
  # the posterior
  geom_density(data = posterior_samples(vet_threatened_p2),
               aes(x = b_age_scaled), 
               fill = "#E69F00", size = 0, alpha = 1/2) +
  scale_y_continuous() +
  coord_cartesian(xlim = c(-5, 5)) +
  labs(x="Beta Age (brms priors)", y="Density") + 
  theme_bw() -> prior_post_4_beta

prior_post_4_beta

# Plotting influence of prior on posterior for a sigma parameter

# SD Family Effect
tibble(x = seq(from = 0, to = 6, by = .01)) %>%   
  ggplot() +
  # the prior
  geom_ribbon(aes(x = x, ymin = 0, ymax = dstudent_t(x, df=3, mu=0, sigma=10)),
              fill = "#A9A9A9", alpha = 1/2) +
  # the posterior
  geom_density(data = posterior_samples(vet_threatened_p2),
               aes(x = sd_family_name__Intercept), 
               fill = "#E69F00", size = 0, alpha = 1/2) +
  scale_y_continuous() +
  coord_cartesian(xlim = c(0, 5)) +
  labs(x="SD Family Effects (brms priors)", y="Density") + 
  theme_bw() -> prior_post_4_sigma

prior_post_4_sigma

Naturalized model

if (!file.exists("./fit_nat_10k.rds")) {

  fit_nat <- brm(naturalized | trials(species) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name), 
    data=data, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.95, max_treedepth=11),cores=4)

  saveRDS(fit_nat, "./fit_nat_10k.rds")

} else { fit_nat <- readRDS("./fit_nat_10k.rds")}

outcome <- summary(fit_nat$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")

fit_nat_plot <- stanplot(fit_nat, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of naturalized species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
fit_nat_plot

pdf("./plots_tables/fit_nat_plot.pdf", width=5, height=4)
fit_nat_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate -2.34 0.00 0.28 -2.89 -2.53 -2.34 -2.16 -1.81 4678.85 1
Famly age -1.51 0.00 0.25 -2.00 -1.69 -1.51 -1.35 -1.04 5007.11 1
Range size 1.61 0.00 0.27 1.10 1.43 1.61 1.79 2.15 4489.47 1
Hybrids 0.27 0.00 0.13 0.02 0.18 0.27 0.35 0.52 5005.29 1
Animal pollinated -0.41 0.00 0.27 -0.94 -0.59 -0.41 -0.23 0.12 4966.08 1
Fleshy fruit -0.07 0.00 0.24 -0.55 -0.23 -0.06 0.09 0.39 4740.17 1
Climate sum 0.18 0.00 0.10 -0.03 0.11 0.17 0.25 0.38 4295.60 1
Herbaceous 0.65 0.00 0.22 0.23 0.50 0.65 0.80 1.09 5005.66 1
Annual 0.08 0.00 0.20 -0.31 -0.05 0.08 0.21 0.47 4787.92 1
Axsexual 0.36 0.00 0.18 0.01 0.24 0.36 0.48 0.71 5067.27 1
SD brownian effects 1.19 0.01 0.18 0.79 1.07 1.21 1.32 1.49 895.76 1
SD family specific effects 0.51 0.01 0.23 0.05 0.36 0.53 0.68 0.91 620.61 1
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/fit_nat.tex")

##Posterior predictive checks
pp_nat <- brms::posterior_predict(fit_nat)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=fit_nat$data$naturalized, pp_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_nat <- apply(pp_nat, 1, function(x) nrmse(pred=x,obs=fit_nat$data$naturalized, type="iq"))
mean(nrmse_nat)
## [1] 0.511576
sd(nrmse_nat)
## [1] 0.06611309
# RMSE
rmse_nat <- apply(pp_nat, 1, function(x) rmse(pred=x,obs=fit_nat$data$naturalized))
mean(rmse_nat)
## [1] 7.162065
sd(rmse_nat)
## [1] 0.9255925
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_nat, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.81      0.15     0.45        1         NA
##   Post.Prob Star
## 1        NA    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# simplified threat model
outcome <- summary(vet_threatened_nat$fit, prob=c(0.025,0.10, 0.90,0.975))
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
to_print <- data.frame(to_print)

names1 <- c("Number of naturalized species","Diversification rate","Family age","Range size",paste("\u03C3", "Brownian effects", sep=" "),paste("\u03C3", "family specific effects", sep=" "))
to_print$variable <- as.factor(names1)


p <-  ggplot(to_print, aes(x=variable, y=mean)) + scale_x_discrete(limits = rev(levels(to_print$variable))) +
      geom_pointrange(aes(ymin=X2.5., ymax=X97.5.), colour="darkgrey", linetype="dotted") + 
      geom_pointrange(aes(ymin=X10., ymax=X90.), colour="black") + 
      coord_flip()  + 
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black")) + 
      geom_hline(yintercept = 0) + xlab("") + ylab("") +
      theme(legend.title = element_blank()) 
p

ggsave("./plots_tables/threat_nat_forest.pdf", p , height=3,width=5)
ggsave("./plots_tables/threat_nat_forest.png", p , height=3,width=5)


# combined Threat and Naturalized
threat_out <- summary(vet_threatened$fit, prob=c(0.025,0.10, 0.90,0.975))
threat_out <- threat_out$summary[grep("^[sd,b]_*",rownames(threat_out$summary)),]
threat_out <- threat_out[-1,]
threat_out <- data.frame(threat_out)
threat_out$group <- rev(letters[1:nrow(threat_out)])
threat_out$model <- "Threatened"

nat_out <- summary(fit_nat$fit, prob=c(0.025,0.10, 0.90,0.975))
nat_out <- nat_out$summary[grep("^[sd,b]_*",rownames(nat_out$summary)),]
nat_out <- nat_out[-1,]
nat_out <- data.frame(nat_out)
nat_out$group <- rev(letters[1:nrow(nat_out)])
nat_out$model <- "Naturalized"

labels=c("Diversification rate","Family age", "Range size", "Hybrids", "Animal pollination", "Fleshy fruits", "Climate range variability", "Herbaceous","Annual","Asexual seed production", paste("\u03C3", "Brownian effects", sep=" "),paste("\u03C3", "family specific effects", sep=" "))

threat_out$variable <- labels
nat_out$variable <- labels

coefs <- rbind(threat_out, nat_out)

coefs$overlap <- ((coefs$X2.5. > 0) == (coefs$X97.5. > 0))
coefs$overlap[coefs$overlap==TRUE] <- "n"
coefs$overlap[coefs$overlap==FALSE] <- "y"


top <- coefs$mean[1:(length(coefs$mean)/2)]
bottom <- coefs$mean[((length(coefs$mean)/2)+1):length(coefs$mean)]
coefs$opposite <- ((top > 0) == (bottom > 0))
coefs$opposite[coefs$opposite==TRUE] <- "n"
coefs$opposite[coefs$opposite==FALSE] <- "y"

coefs$both <- paste0(coefs$overlap,coefs$opposite)

labels<-rev(labels)

cols<-c("#88b7c5","#a36666")

p2 <- ggplot(coefs, aes(x=group, y=mean, color=model)) + scale_shape_manual(values=c(17,15,17,15)) + scale_alpha_manual(values=c(1,1,0.4,0.4)) +
  geom_pointrange(aes(ymin=X2.5., ymax=X97.5.,shape=both,alpha=both),fatten = 0,linetype="dotted") + 
  geom_pointrange(aes(ymin=X10., ymax=X90.,shape=both,alpha=both), size=0.75) + 

  coord_flip()  + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_line(colour = "black")) + 
  geom_hline(yintercept = 0)  + xlab("") + ylab("") + theme(legend.position = "none") +
theme(legend.title = element_blank()) + scale_x_discrete(labels=labels) + guides(shape = FALSE, size = FALSE) + scale_colour_manual(values=cols)

p2             

ggsave("./plots_tables/combined_forest.pdf", p2 , height=5,width=6)
ggsave("./plots_tables/combined_forest.png", p2 , height=5,width=6)

Sensitivity analyses

Simple threat model: without range size (custom priors)

if (!file.exists("./vet_threatened_naturalized_norange_10k.rds")) {

  vet_threatened_nat_norange <- brm(threatened | trials(vetted) ~ naturalized_scaled + diversification_scaled + age_scaled + (1|family) + (1|family_name), 
    data=data_vetted, family=binomial(),
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.95,max_treedepth=12), cores=4)

  saveRDS(vet_threatened_nat_norange, "./vet_threatened_naturalized_norange_10k.rds")

} else { vet_threatened_nat_norange <- readRDS("./vet_threatened_naturalized_norange_10k.rds")}

# prior_summary(vet_threatened_nat_norange, all=TRUE)

outcome <- summary(vet_threatened_nat_norange$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Number of naturalized species","Diversification rate","Family age", "SD brownian effects","SD family specific effects")

vet_threatened_nat_norange_plot <- stanplot(vet_threatened_nat_norange, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_nat_norange_plot

pdf("./plots_tables/vet_threatened_nat_norange_plot.pdf", width=5, height=4)
vet_threatened_nat_norange_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Number of naturalized species -0.65 0.00 0.19 -1.02 -0.78 -0.65 -0.52 -0.27 2340.07 1.00
Diversification rate 1.47 0.01 0.28 0.94 1.28 1.47 1.65 2.02 2169.19 1.00
Family age 0.79 0.00 0.22 0.34 0.64 0.79 0.93 1.22 3196.43 1.00
SD brownian effects 0.74 0.01 0.21 0.23 0.61 0.76 0.90 1.08 588.46 1.01
SD family specific effects 0.52 0.01 0.20 0.07 0.40 0.54 0.66 0.84 529.60 1.01
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_nat_norange.tex")

##Posterior predictive checks
pp_vet_threatened_nat_norange <- brms::posterior_predict(vet_threatened_nat_norange)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_threatened_nat_norange$data$threatened, pp_vet_threatened_nat_norange[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet_threatened_nat_norange <- apply(pp_vet_threatened_nat_norange, 1, function(x) nrmse(pred=x,obs=vet_threatened_nat_norange$data$threatened, type="iq"))
mean(nrmse_vet_threatened_nat_norange)
## [1] 0.3107234
sd(nrmse_vet_threatened_nat_norange)
## [1] 0.0352125
# RMSE
rmse_vet_threatened_nat_norange <- apply(pp_vet_threatened_nat_norange, 1, function(x) rmse(pred=x,obs=vet_threatened_nat_norange$data$threatened))
mean(rmse_vet_threatened_nat_norange)
## [1] 6.525209
sd(rmse_vet_threatened_nat_norange)
## [1] 0.7394051
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened_nat_norange, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.63      0.25     0.07        1         NA
##   Post.Prob Star
## 1        NA    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

Alternative Family Effects Structures

Full threat model: no family level effects

if (!file.exists("./vet_threatened_nofam_10k.rds")) {

  vet_threatened_nofam <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size +  hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual, 
    data=data_vetted, family=binomial(), 
    iter=10000, thin=4,
    control=list(adapt_delta=0.95,max_treedepth=13),cores=4)

  saveRDS(vet_threatened_nofam, "./vet_threatened_nofam_10k.rds")

} else { vet_threatened_nofam <- readRDS("./vet_threatened_nofam_10k.rds")}

outcome <- summary(vet_threatened_nofam$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual")

vet_threatened_nofam_plot <- stanplot(vet_threatened_nofam, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_nofam_plot

pdf("./plots_tables/vet_threatened_nofam_plot.pdf", width=5, height=4)
vet_threatened_nofam_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 1.08 0 0.07 0.94 1.03 1.08 1.12 1.21 4776.17 1
Famly age 1.02 0 0.06 0.90 0.97 1.02 1.06 1.14 4783.33 1
Range size -0.21 0 0.06 -0.33 -0.25 -0.21 -0.16 -0.08 4901.92 1
Hybrids -0.19 0 0.04 -0.27 -0.21 -0.19 -0.16 -0.10 5002.25 1
Animal pollinated 0.91 0 0.05 0.82 0.88 0.91 0.94 1.00 4504.78 1
Fleshy fruit -0.50 0 0.06 -0.62 -0.54 -0.50 -0.46 -0.38 4748.86 1
Climate sum -0.17 0 0.02 -0.22 -0.19 -0.17 -0.16 -0.13 5093.39 1
Herbaceous 0.02 0 0.03 -0.05 -0.01 0.02 0.04 0.09 4940.66 1
Annual 0.05 0 0.03 -0.02 0.03 0.05 0.08 0.12 4901.13 1
Axsexual -0.26 0 0.03 -0.31 -0.27 -0.26 -0.24 -0.20 5115.10 1
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_nofam.tex")


##Posterior predictive checks
# first make posterior predictions
pp_threatened_nofam <- brms::posterior_predict(vet_threatened_nofam)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_threatened_nofam$data$threatened, pp_threatened_nofam[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_threatened_nofam <- apply(pp_threatened_nofam, 1, function(x) nrmse(pred=x,obs=vet_threatened_nofam$data$threatened, type="iq"))
mean(nrmse_threatened_nofam)
## [1] 1.140331
sd(nrmse_threatened_nofam)
## [1] 0.05214951
# RMSE
rmse_threatened_nofam <- apply(pp_threatened_nofam, 1, function(x) rmse(pred=x,obs=vet_threatened_nofam$data$threatened))
mean(rmse_threatened_nofam)
## [1] 23.9469
sd(rmse_threatened_nofam)
## [1] 1.095118

Full threat model: only non-brownian family effects

if (!file.exists("./vet_threatened_nophylo_10k.rds")) {

  vet_threatened_nophylo <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family_name), 
    data=data_vetted, family=binomial(), 
    iter=10000, thin=4,
    control=list(adapt_delta=0.95,max_treedepth=13),cores=4)

  saveRDS(vet_threatened_nophylo, "./vet_threatened_nophylo_10k.rds")

} else { vet_threatened_nophylo <- readRDS("./vet_threatened_nophylo_10k.rds")}

outcome <- summary(vet_threatened_nophylo$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects")

vet_threatened_nophylo_plot <- stanplot(vet_threatened_nophylo, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_nophylo_plot

pdf("./plots_tables/vet_threatened_nophylo_plot.pdf", width=5, height=4)
vet_threatened_nophylo_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 1.74 0 0.26 1.24 1.57 1.73 1.92 2.27 3807.45 1
Famly age 1.01 0 0.21 0.60 0.87 1.01 1.15 1.43 3957.77 1
Range size -0.82 0 0.23 -1.26 -0.97 -0.81 -0.66 -0.37 3648.97 1
Hybrids -0.22 0 0.12 -0.46 -0.30 -0.21 -0.13 0.03 4253.90 1
Animal pollinated 0.71 0 0.20 0.32 0.57 0.71 0.84 1.09 4502.85 1
Fleshy fruit -0.10 0 0.19 -0.48 -0.22 -0.10 0.03 0.27 4816.39 1
Climate sum -0.05 0 0.08 -0.21 -0.11 -0.05 0.00 0.10 4231.89 1
Herbaceous -0.05 0 0.16 -0.36 -0.15 -0.04 0.06 0.27 4199.60 1
Annual 0.06 0 0.17 -0.26 -0.06 0.06 0.17 0.39 3215.16 1
Axsexual -0.10 0 0.14 -0.37 -0.20 -0.11 -0.01 0.18 3430.90 1
SD brownian effects 0.79 0 0.06 0.69 0.76 0.79 0.83 0.91 3732.53 1
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_nophylo.tex")


##Posterior predictive checks
pp_threatened_nophylo <- brms::posterior_predict(vet_threatened_nophylo)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_threatened_nophylo$data$threatened, pp_threatened_nophylo[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_threatened_nophylo <- apply(pp_threatened_nophylo, 1, function(x) nrmse(pred=x,obs=vet_threatened_nophylo$data$threatened, type="iq"))
mean(nrmse_threatened_nophylo)
## [1] 0.3115518
sd(nrmse_threatened_nophylo)
## [1] 0.03584301
# RMSE
rmse_threatened_nophylo <- apply(pp_threatened_nophylo, 1, function(x) rmse(pred=x,obs=vet_threatened_nophylo$data$threatened))
mean(rmse_threatened_nophylo)
## [1] 6.54262
sd(rmse_threatened_nophylo)
## [1] 0.7526886

Full threat model: only brownian family effects

if (!file.exists("./vet_threatened_phylo_10k.rds")) {

  vet_threatened_phylo <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual+ (1|family), 
    data=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.95,max_treedepth=13),cores=4)

  saveRDS(vet_threatened_phylo, "./vet_threatened_phylo_10k.rds")

} else { vet_threatened_phylo <- readRDS("./vet_threatened_phylo_10k.rds")}

outcome <- summary(vet_threatened_phylo$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects")

vet_threatened_phylo_plot <- stanplot(vet_threatened_phylo, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_phylo_plot

pdf("./plots_tables/vet_threatened_phylo_plot.pdf", width=5, height=4)
vet_threatened_phylo_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 1.51 0 0.25 1.03 1.35 1.51 1.67 2.02 3652.24 1
Famly age 0.86 0 0.23 0.41 0.71 0.85 1.01 1.30 3636.21 1
Range size -0.66 0 0.22 -1.11 -0.81 -0.66 -0.51 -0.23 4159.18 1
Hybrids -0.22 0 0.12 -0.45 -0.30 -0.22 -0.14 0.00 4655.58 1
Animal pollinated 0.62 0 0.21 0.20 0.47 0.61 0.76 1.05 4121.14 1
Fleshy fruit -0.11 0 0.19 -0.50 -0.24 -0.11 0.02 0.26 4023.19 1
Climate sum -0.04 0 0.08 -0.21 -0.10 -0.04 0.01 0.12 4214.25 1
Herbaceous -0.02 0 0.18 -0.37 -0.15 -0.02 0.10 0.34 3794.82 1
Annual 0.08 0 0.16 -0.24 -0.03 0.08 0.19 0.41 3148.23 1
Axsexual -0.13 0 0.14 -0.40 -0.22 -0.13 -0.03 0.14 3646.58 1
SD brownian effects 0.98 0 0.07 0.85 0.93 0.98 1.02 1.12 3759.27 1
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_phylo.tex")


##Posterior predictive checks
pp_threatened_phylo <- brms::posterior_predict(vet_threatened_phylo)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_threatened_phylo$data$threatened, pp_threatened_phylo[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_threatened_phylo <- apply(pp_threatened_phylo, 1, function(x) nrmse(pred=x,obs=vet_threatened_phylo$data$threatened, type="iq"))
mean(nrmse_threatened_phylo)
## [1] 0.3114918
sd(nrmse_threatened_phylo)
## [1] 0.03569686
# RMSE
rmse_threatened_phylo <- apply(pp_threatened_phylo, 1, function(x) rmse(pred=x,obs=vet_threatened_phylo$data$threatened))
mean(rmse_threatened_phylo)
## [1] 6.541305
sd(rmse_threatened_phylo)
## [1] 0.7495942

Full threat model (total species richness - not vetted by IUCN)

if (!file.exists("./nonvet_threatened_10k.rds")) {

  nonvet_threatened <- brm(threatened | trials(species) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name), 
    data=data, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.99,max_treedepth=13),cores=4)

  saveRDS(nonvet_threatened, "./nonvet_threatened_10k.rds")

} else { nonvet_threatened <- readRDS("./nonvet_threatened_10k.rds")}

outcome <- summary(nonvet_threatened$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")

nonvet_threatened_plot <- stanplot(nonvet_threatened, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
nonvet_threatened_plot

pdf("./plots_tables/nonvet_threatened_plot.pdf", width=5, height=4)
nonvet_threatened_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.71 0.00 0.32 0.11 0.50 0.70 0.92 1.34 4042.20 1.00
Famly age 0.48 0.00 0.25 0.00 0.31 0.48 0.65 0.97 4323.14 1.00
Range size -0.33 0.00 0.28 -0.87 -0.52 -0.33 -0.14 0.23 4493.81 1.00
Hybrids -0.04 0.00 0.15 -0.33 -0.14 -0.04 0.06 0.25 4416.26 1.00
Animal pollinated 0.15 0.00 0.26 -0.37 -0.02 0.15 0.32 0.65 4565.73 1.00
Fleshy fruit -0.07 0.00 0.23 -0.51 -0.23 -0.07 0.08 0.37 4374.02 1.00
Climate sum -0.27 0.00 0.10 -0.47 -0.34 -0.27 -0.20 -0.07 4492.38 1.00
Herbaceous -0.80 0.00 0.21 -1.22 -0.95 -0.81 -0.66 -0.38 3410.76 1.00
Annual -0.41 0.00 0.21 -0.82 -0.55 -0.41 -0.27 -0.01 4601.07 1.00
Axsexual 0.23 0.00 0.18 -0.13 0.10 0.23 0.35 0.59 4197.47 1.00
SD brownian effects 0.49 0.01 0.25 0.04 0.30 0.50 0.67 0.97 373.84 1.02
SD family specific effects 0.99 0.00 0.11 0.74 0.92 1.00 1.07 1.19 689.53 1.01
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/nonvet_threatened.tex")

##Posterior predictive checks
pp_threatened <- brms::posterior_predict(nonvet_threatened)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=nonvet_threatened$data$threatened, pp_threatened[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_nonvet <- apply(pp_threatened, 1, function(x) nrmse(pred=x,obs=nonvet_threatened$data$threatened, type="iq"))
mean(nrmse_nonvet)
## [1] 0.4715996
sd(nrmse_nonvet)
## [1] 0.05317477
# RMSE
rmse_nonvet <- apply(pp_threatened, 1, function(x) rmse(pred=x,obs=nonvet_threatened$data$threatened))
mean(rmse_nonvet)
## [1] 7.54555
sd(rmse_nonvet)
## [1] 0.8506925
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(nonvet_threatened, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.23      0.17        0     0.62         NA
##   Post.Prob Star
## 1        NA    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

Full models with MOM diversification estimator

Full threat model (IUCN vetted species) - MOM

if (!file.exists("./vet_threatened_MOM_10k.rds")) {

  vet_threatened_MOM <- brm(threatened | trials(vetted) ~ diversification_mom_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name), 
    data=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.98,max_treedepth=13),cores=4)

  # no divergent transitions

  saveRDS(vet_threatened_MOM, "./vet_threatened_MOM_10k.rds")

} else { vet_threatened_MOM <- readRDS("./vet_threatened_MOM_10k.rds")}

# prior_summary(vet_threatened_MOM, all=FALSE)

outcome <- summary(vet_threatened_MOM$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate (MOM)","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")

vet_threatened_MOM_plot <- stanplot(vet_threatened_MOM, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_MOM_plot

pdf("./plots_tables/vet_threatened_MOM_plot.pdf", width=5, height=4)
vet_threatened_MOM_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate (MOM) 1.40 0.00 0.22 0.97 1.25 1.40 1.55 1.85 4187.89 1.00
Famly age 0.65 0.00 0.19 0.27 0.52 0.65 0.78 1.02 4370.33 1.00
Range size -0.75 0.00 0.23 -1.20 -0.91 -0.74 -0.58 -0.29 5028.09 1.00
Hybrids -0.21 0.00 0.12 -0.45 -0.29 -0.21 -0.13 0.03 4951.56 1.00
Animal pollinated 0.70 0.00 0.21 0.30 0.56 0.70 0.85 1.11 4724.15 1.00
Fleshy fruit -0.13 0.00 0.19 -0.51 -0.26 -0.13 0.00 0.26 4792.09 1.00
Climate sum -0.05 0.00 0.08 -0.22 -0.11 -0.05 0.01 0.11 5133.96 1.00
Herbaceous -0.02 0.00 0.18 -0.37 -0.14 -0.02 0.09 0.33 5114.28 1.00
Annual 0.05 0.00 0.17 -0.27 -0.06 0.05 0.16 0.39 4683.03 1.00
Axsexual -0.14 0.00 0.14 -0.42 -0.23 -0.14 -0.04 0.14 4964.23 1.00
SD brownian effects 0.52 0.01 0.24 0.05 0.35 0.53 0.70 0.96 519.18 1.01
SD family specific effects 0.62 0.01 0.16 0.20 0.54 0.65 0.74 0.85 577.42 1.00
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_MOM.tex")

##Posterior predictive checks
# first make posterior predictions
pp_threatened <- brms::posterior_predict(vet_threatened_MOM)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_threatened_MOM$data$threatened, pp_threatened[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet <- apply(pp_threatened, 1, function(x) nrmse(pred=x,obs=vet_threatened_MOM$data$threatened, type="iq"))
mean(nrmse_vet)
## [1] 0.3100034
sd(nrmse_vet)
## [1] 0.03550366
# RMSE
rmse_vet <- apply(pp_threatened, 1, function(x) rmse(pred=x,obs=vet_threatened_MOM$data$threatened))
mean(rmse_vet)
## [1] 6.509996
sd(rmse_vet)
## [1] 0.7455832
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened_MOM, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.42      0.27        0     0.96         NA
##   Post.Prob Star
## 1        NA    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# plot(hyp)

Naturalized model - MOM

if (!file.exists("./fit_nat_MOM_10k.rds")) {

  fit_nat_MOM <- brm(naturalized | trials(species) ~ diversification_mom_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name), 
    data=data, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.95, max_treedepth=11),cores=4)

  saveRDS(fit_nat_MOM, "./fit_nat_MOM_10k.rds")

} else { fit_nat_MOM <- readRDS("./fit_nat_MOM_10k.rds")}

outcome <- summary(fit_nat_MOM$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate (MOM)","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")

fit_nat_MOM_plot <- stanplot(fit_nat_MOM, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of naturalized species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
fit_nat_MOM_plot

pdf("./plots_tables/fit_nat_MOM_plot.pdf", width=5, height=4)
fit_nat_MOM_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate (MOM) -2.00 0.00 0.25 -2.49 -2.17 -2.00 -1.83 -1.51 4574.00 1
Famly age -1.12 0.00 0.23 -1.56 -1.27 -1.11 -0.96 -0.68 4865.63 1
Range size 1.58 0.00 0.27 1.05 1.40 1.57 1.76 2.13 4432.73 1
Hybrids 0.25 0.00 0.13 -0.01 0.16 0.25 0.33 0.50 4760.78 1
Animal pollinated -0.47 0.00 0.28 -1.02 -0.65 -0.46 -0.28 0.07 4641.26 1
Fleshy fruit -0.03 0.00 0.24 -0.51 -0.19 -0.03 0.13 0.44 4890.97 1
Climate sum 0.18 0.00 0.10 -0.02 0.11 0.18 0.24 0.38 5019.20 1
Herbaceous 0.66 0.00 0.22 0.22 0.52 0.66 0.81 1.10 4700.80 1
Annual 0.11 0.00 0.20 -0.27 -0.02 0.11 0.24 0.50 4658.60 1
Axsexual 0.41 0.00 0.19 0.05 0.29 0.41 0.53 0.77 4937.08 1
SD brownian effects 1.21 0.01 0.18 0.83 1.09 1.22 1.33 1.51 1092.51 1
SD family specific effects 0.51 0.01 0.22 0.04 0.36 0.54 0.68 0.89 653.70 1
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/fit_nat_MOM.tex")

##Posterior predictive checks
pp_nat <- brms::posterior_predict(fit_nat_MOM)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=fit_nat_MOM$data$naturalized, pp_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_nat <- apply(pp_nat, 1, function(x) nrmse(pred=x,obs=fit_nat_MOM$data$naturalized, type="iq"))
mean(nrmse_nat)
## [1] 0.5091612
sd(nrmse_nat)
## [1] 0.0639227
# RMSE
rmse_nat <- apply(pp_nat, 1, function(x) rmse(pred=x,obs=fit_nat_MOM$data$naturalized))
mean(rmse_nat)
## [1] 7.128302
sd(rmse_nat)
## [1] 0.894913
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_nat_MOM, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.81      0.14     0.49        1         NA
##   Post.Prob Star
## 1        NA    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

Full models with number of horticultural species

Full threat model (IUCN vetted species) - Horticultural use

if (!file.exists("./vet_threatened_hort_10k.rds")) {

  vet_threatened_hort <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + hort.species + (1|family) + (1|family_name), 

    data=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.98,max_treedepth=13),cores=4)

  # no divergent transitions

  saveRDS(vet_threatened_hort, "./vet_threatened_hort_10k.rds")

} else { vet_threatened_hort <- readRDS("./vet_threatened_hort_10k.rds")}

# prior_summary(vet_threatened_hort, all=FALSE)

outcome <- summary(vet_threatened_hort$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","Horticultural species", "SD brownian effects","SD family specific effects")

vet_threatened_hort_plot <- stanplot(vet_threatened_hort, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_hort_plot

pdf("./plots_tables/vet_threatened_hort_plot.pdf", width=5, height=4)
vet_threatened_hort_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 1.72 0.00 0.29 1.16 1.53 1.72 1.92 2.29 3387.75 1.00
Famly age 0.96 0.00 0.23 0.52 0.80 0.96 1.11 1.40 3284.76 1.00
Range size -0.66 0.00 0.24 -1.11 -0.82 -0.66 -0.49 -0.19 4487.16 1.00
Hybrids -0.21 0.00 0.12 -0.44 -0.29 -0.21 -0.13 0.02 4580.59 1.00
Animal pollinated 0.64 0.00 0.21 0.24 0.50 0.64 0.78 1.06 4008.94 1.00
Fleshy fruit -0.08 0.00 0.19 -0.47 -0.21 -0.08 0.04 0.29 4554.09 1.00
Climate sum -0.03 0.00 0.08 -0.20 -0.09 -0.03 0.03 0.13 4676.35 1.00
Herbaceous -0.02 0.00 0.18 -0.37 -0.14 -0.02 0.09 0.33 4747.66 1.00
Annual 0.09 0.00 0.17 -0.23 -0.02 0.09 0.20 0.42 4483.79 1.00
Axsexual -0.08 0.00 0.14 -0.37 -0.18 -0.08 0.01 0.20 4618.21 1.00
Horticultural species -0.21 0.00 0.23 -0.66 -0.36 -0.21 -0.05 0.24 4067.48 1.00
SD brownian effects 0.46 0.01 0.25 0.03 0.27 0.47 0.65 0.92 313.14 1.01
SD family specific effects 0.64 0.01 0.15 0.25 0.57 0.67 0.75 0.86 359.42 1.01
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_hort.tex")

##Posterior predictive checks
# first make posterior predictions
pp_threatened <- brms::posterior_predict(vet_threatened_hort)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_threatened_hort$data$threatened, pp_threatened[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet <- apply(pp_threatened, 1, function(x) nrmse(pred=x,obs=vet_threatened_hort$data$threatened, type="iq"))
mean(nrmse_vet)
## [1] 0.3113806
sd(nrmse_vet)
## [1] 0.03590083
# RMSE
rmse_vet <- apply(pp_threatened, 1, function(x) rmse(pred=x,obs=vet_threatened_hort$data$threatened))
mean(rmse_vet)
## [1] 6.538981
sd(rmse_vet)
## [1] 0.7538275
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened_hort, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.36      0.27        0     0.93         NA
##   Post.Prob Star
## 1        NA    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# plot(hyp)

Naturalized model - horticultural species

if (!file.exists("./fit_nat_hort_10k.rds")) {

  fit_nat_hort <- brm(naturalized | trials(species) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + hort.species + (1|family) + (1|family_name), 
    data=data, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.9, max_treedepth=11),cores=4)

  saveRDS(fit_nat_hort, "./fit_nat_hort_10k.rds")

} else { fit_nat_hort <- readRDS("./fit_nat_hort_10k.rds")}

outcome <- summary(fit_nat_hort$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual", "Horticultural species", "SD brownian effects","SD family specific effects")

fit_nat_hort_plot <- stanplot(fit_nat_hort, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of naturalized species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
fit_nat_hort_plot

pdf("./plots_tables/fit_nat_hort_plot.pdf", width=5, height=4)
fit_nat_hort_plot + ggtitle("")
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate -4.00 0.00 0.25 -4.48 -4.18 -4.01 -3.84 -3.50 3483.62 1
Famly age -2.31 0.00 0.19 -2.68 -2.45 -2.32 -2.18 -1.93 4079.04 1
Range size 0.25 0.00 0.23 -0.20 0.09 0.25 0.40 0.68 4774.36 1
Hybrids 0.19 0.00 0.10 -0.01 0.12 0.19 0.25 0.37 4804.81 1
Animal pollinated 0.03 0.00 0.21 -0.38 -0.10 0.03 0.17 0.44 4811.80 1
Fleshy fruit -0.16 0.00 0.19 -0.54 -0.29 -0.16 -0.03 0.21 3789.03 1
Climate sum -0.03 0.00 0.08 -0.19 -0.08 -0.03 0.03 0.13 4748.73 1
Herbaceous 0.56 0.00 0.16 0.24 0.45 0.56 0.67 0.89 4646.70 1
Annual -0.30 0.00 0.15 -0.59 -0.41 -0.30 -0.20 -0.02 4679.69 1
Axsexual -0.06 0.00 0.14 -0.33 -0.15 -0.06 0.03 0.20 4470.14 1
Horticultural species 3.32 0.00 0.25 2.82 3.14 3.32 3.49 3.80 4728.62 1
SD brownian effects 0.60 0.01 0.20 0.13 0.48 0.62 0.74 0.96 662.18 1
SD family specific effects 0.55 0.01 0.15 0.16 0.48 0.57 0.65 0.78 643.33 1
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/fit_nat_hort.tex")

##Posterior predictive checks
pp_nat <- brms::posterior_predict(fit_nat_hort)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=fit_nat_hort$data$naturalized, pp_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_nat <- apply(pp_nat, 1, function(x) nrmse(pred=x,obs=fit_nat_hort$data$naturalized, type="iq"))
mean(nrmse_nat)
## [1] 0.5103294
sd(nrmse_nat)
## [1] 0.0656822
# RMSE
rmse_nat <- apply(pp_nat, 1, function(x) rmse(pred=x,obs=fit_nat_hort$data$naturalized))
mean(rmse_nat)
## [1] 7.144582
sd(rmse_nat)
## [1] 0.9195189
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_nat_hort, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.52      0.24     0.03     0.97         NA
##   Post.Prob Star
## 1        NA    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.